package com.metis.document.parse.dialog.pipeline.impl;

import com.metis.document.parse.dialog.pipeline.TitleTextPostProcess;
import lombok.extern.slf4j.Slf4j;
import org.jetbrains.annotations.NotNull;
import org.springframework.stereotype.Component;

@Slf4j
@Component
public class TitleTextPostProcessBySpace implements TitleTextPostProcess {
    @Override
    public String process(@NotNull String input) {
        input = input
                .replaceAll("(?<=\\d)\\s*(?=[\\u4e00-\\u9fa5])|(?<=[\\u4e00-\\u9fa5])\\s*(?=\\d)","")
                .replaceAll("(?<=\\d)\\s*(?=[,，])|(?<=[,，])\\s*(?=\\d)","")
                .replaceAll("\\s+"," ")
                .replaceAll("[_-一-]\\s\\d\\s[-_-一]"," ")
                .replaceAll("\n+","\n")
                .replaceAll("\\\\u[0-9a-fA-F]{4}","")
                .replaceAll("[目]\\s*[录]|[目]\\s*[次]|[目][\\s　]*[次录]","")
        ;

        return input;
    }
}
